3  Figures

This document explains how to reproduce the figures presented in the paper.

3.1 Install Packages

We install the following packages using the groundhog package manager to increase computational reproducibility.

options(repos = c(CRAN = "https://cran.r-project.org")) 


if (!requireNamespace("groundhog", quietly = TRUE)) {
    install.packages("groundhog")
}

pkgs <- c("magrittr", "data.table", "stringr", "lubridate", "knitr", "glue",
          "sandwich", "lmtest",
          "ggplot2", "ggpubr", "rstatix", "patchwork")

groundhog::groundhog.library(pkg = pkgs,
                             date = "2024-08-01")

rm(pkgs)

3.2 Read Data

# data <- data.table::fread(file = "../data/processed/full.csv")
data <- readRDS(file="../data/processed/full.Rda")

3.3 Figure 1

To create Figure 3.1 (and fig-2), we create a wrapper function that we can call several times. As all the other figures presented in this document, Figure 3.1 consists of three panels, top, left, and right that are relatively similar. We thus, save both space and sources of error by creating a wrapper function plot_bars() that creates bar plots and annotates them with test statistics.

plot_bars <- function(response = "b", surprise_sub = NA, limits = ylim(-1.02, 1.02)){
  
  if(response == "b"){
      y_1 = 0.6
      y_2 = 0.4
    } else {
      y_1 = 1.4
      y_2 = 1
    }
  
  if(!is.na(surprise_sub)){
    # Plot bottom panels
    tmp <- data[surprise == surprise_sub]
    names(tmp)[names(tmp) == response] <- 'outcome'
    
    if(surprise_sub){
      title <- "Surprising Condition"
    } else {
      title <- "Confirming Condition"
    }
    
    test_stats_1 <- tmp %>% 
      group_by(communication) %>%
      wilcox_test(formula = outcome ~ stage,
                  paired = T) %>% 
      adjust_pvalue(method = "none") %>%
      add_significance(p.col = "p.adj",
                       cutpoints = c(0, 0.01, 0.05, 0.1, 1),
                       symbols = c( "***", "**", "*", "ns")) %>%
      as.data.table()
    
    
    test_stats_2 <- tmp %>% 
      group_by(stage) %>%
      wilcox_test(formula = outcome ~ communication) %>% 
      adjust_pvalue(method = "none") %>%
      add_significance(p.col = "p.adj",
                       cutpoints = c(0, 0.01, 0.05, 0.1, 1),
                       symbols = c( "***", "**", "*", "ns")) %>%
      as.data.table()
    test_stats_2 <- test_stats_2[stage == 2]
    
    plot_bottom <- ggplot(data = tmp,
           mapping = aes(x = as.factor(communication),
                         y = outcome)) +
        geom_bar(aes(fill = stage),
                 position = "dodge", 
                 stat = "summary", 
                 fun = "mean") + 
      limits +
      scale_fill_manual(values=c("black", "gray")) +
      theme_classic() +
      stat_pvalue_manual(data = test_stats_2,
                         label = "{p} ({p.adj.signif})", 
                         step.group.by = "stage",
                         tip.length = 0, 
                         step.increase = 0.1, 
                         y.position = y_1) +
      stat_pvalue_manual(data = test_stats_1,
                         label = "{p} ({p.adj.signif})",
                         y.position = y_2,
                         tip.length = 0,
                         x = "communication") +
      labs(title = title,
           x = "Communication",
           y = glue("Ambiguity index {response} (Baillon)"))
    
    rm(tmp)
    
    plot_bottom
  } else {
    # Plot the top panel
    tmp <- data
    names(tmp)[names(tmp) == response] <- 'outcome'
    
    title <- "Both Conditions"
    
    test_stats_1 <- tmp %>% 
      group_by(surprise) %>%
      wilcox_test(formula = outcome ~ stage,
                  paired = T) %>% 
      adjust_pvalue(method = "none") %>%
      add_significance(p.col = "p.adj",
                       cutpoints = c(0, 0.01, 0.05, 0.1, 1),
                       symbols = c( "***", "**", "*", "ns")) %>%
      as.data.table()
    
    
    test_stats_2 <- tmp %>% 
      group_by(stage) %>%
      wilcox_test(formula = outcome ~ surprise) %>% 
      adjust_pvalue(method = "none") %>%
      add_significance(p.col = "p.adj",
                       cutpoints = c(0, 0.01, 0.05, 0.1, 1),
                       symbols = c( "***", "**", "*", "ns")) %>%
      as.data.table()
    test_stats_2 <- test_stats_2[stage == 2]
    
    
    plot_top <- ggplot(data = tmp,
           mapping = aes(x = as.factor(surprise),
                         y = outcome)) +
        geom_bar(aes(fill = stage),
                 position = "dodge", 
                 stat = "summary", 
                 fun = "mean") + 
      limits +
      scale_fill_manual(values=c("black", "gray")) +
      theme_classic() +
      stat_pvalue_manual(data = test_stats_2,
                         label = "{p} ({p.adj.signif})", 
                         step.group.by = "stage",
                         tip.length = 0, 
                         step.increase = 0.1, 
                         y.position = y_1) +
      stat_pvalue_manual(data = test_stats_1,
                         label = "{p} ({p.adj.signif})",
                         y.position = y_2,
                         tip.length = 0,
                         x = "surprise") +
      labs(title = title,
           x = " Surprising Condition",
           y = glue("Ambiguity index {response} (Baillon)"))
    
    rm(tmp)
    
    plot_top
  }
}
top   <- plot_bars(response = "b", surprise_sub = NA)
left  <- plot_bars(response = "b", surprise_sub = FALSE)
right <- plot_bars(response = "b", surprise_sub = TRUE)

(top / (left | right) & theme(legend.position = "bottom")) + plot_layout(guides = "collect")

Figure 3.1: Means of ambiguity index b separated by treatments and part 1 and part 2. P-values of Wilcoxon signed-rank test comparing part 1 and 2 directly above the mean values. P-values of Wilcoxon–Mann–Whitney test comparing part 2 of different treatments at the top. Note: ∗p<0.10, ∗∗p<0.05, ∗∗∗p<0.01, ns: not significant

3.4 Figure 2

Next, we use the wrapper function again but visualize another outcome using the response == a argument.

top   <- plot_bars(response = "a", limits = ylim(-2.01, 4.01), surprise_sub = NA)
left  <- plot_bars(response = "a", limits = ylim(-2.01, 4.01), surprise_sub = FALSE)
right <- plot_bars(response = "a", limits = ylim(-2.01, 4.01), surprise_sub = TRUE)

(top / (left | right) & theme(legend.position = "bottom")) + plot_layout(guides = "collect")

Figure 3.2: Means of ambiguity index a separated by treatments and part 1 and part 2. P-values of Wilcoxon signed-rank test comparing part 1 and 2 directly above the mean values. P-values of Wilcoxon–Mann–Whitney test comparing part 2 of different treatments at the top. Note: ∗p<0.10, ∗∗p<0.05, ∗∗∗p<0.01, ns: not significant

3.5 Figure 3

Figure 3.3 also presents three panels. In contrast to Figure 3.1 and Figure 3.2, these panels visualize confidence intervals of our estimator of interest: the interaction of stage and surprise (top panel) as well as the interaction between stage and communication in the lower two panels.

The corresponding data stems from many OLS regressions and are computed in for-loops. Even though the code differs slightly, it is very repetitive between the three panels, which is why we only explain the top panel

3.5.1 Top Panel

Before calculating the data based on OLS regressions, we specify three different models: none represents only treatment variables, demographics extends the former by adding demographic variables, and all extends the former two by also adding all remaining covariates.

none <- c("surprise", "treated", "surprise*treated")

demographics <- c(none, "age_35_52","age_53_plus","female", "high_education", "high_income", "married", "parentship")

all <- c(demographics, "high_temperature","high_usage","high_general_risk","high_weather_risk","high_accuracy","high_credibility")

Next, we run a nested loop that iterates through all three models and within these models through both ambiguity indices a and b. From these ols regressions, we compute clustered standard errors using the {{sandwich}} and {{lmtest}} package. The resuling data is stored in a temporary data.table that is appended to an initially empty data.table called ci_top.

ci_top <- data.table()
for(RHS in c("none", "demographics", "all")){
  for(LHS in c("a", "b")){
    ols <- lm(formula = reformulate(termlabels = get(RHS), 
                                    response = LHS),
              data = data)
    
    tmp <- coefci(x = ols,
                  parm = "surpriseTRUE:treatedTRUE",
                  vcov = vcovCL(x = ols, 
                                cluster = data$participant.label, 
                                type = "HC1"),
                  level = 0.95) %>% 
      data.table(model = RHS, outcome = LHS)
    tmp[, center := (`2.5 %` + `97.5 %`)/2]
    
    ci_top <- rbind(ci_top, tmp)
  }
}

The result looks as follows. The data.table provides a column for the 2.5% and 97.5% confidence interval, the center of the two, as well as the ols specification as presented in model and outcome.

ci_top %>% head(n = 7) %>% kable()
2.5 % 97.5 % model outcome center
-0.0461365 0.0608289 none a 0.0073462
-0.0085981 0.0374200 none b 0.0144110
-0.0370710 0.0733050 demographics a 0.0181170
-0.0071204 0.0418846 demographics b 0.0173821
-0.0371322 0.0733663 all a 0.0181170
-0.0071476 0.0419118 all b 0.0173821

Finally, we plot this data set (ci_top) using the {{ggplot2}} package. The resulting object is stored as top and will be assembled together with the other two panels.

top <- ggplot(data = ci_top,
       mapping = aes(y = outcome)) +
  geom_pointrange(mapping = aes(x = center,
                                y = outcome,
                                xmin = `2.5 %`,
                                xmax = `97.5 %`,
                                shape = factor(model)),
                  data = ci_top,
                  position = position_dodge(width = 0.4),
                  fatten=5,
                  alpha=.8) +
  geom_vline(xintercept = 0, 
             color = "red", 
             alpha = 0.66, 
             show.legend = FALSE) +
  theme_bw() +
  labs(#title = "(a) Effect of contradiction (relative to confirmation)",
       y = "Ambiguity Index", 
       x = "Estimate",
       shape="Control variables")

3.5.2 Left Panel

none <- c("communication", "treated", "communication*treated")

demographics <- c(none, "age_35_52","age_53_plus","female", "high_education", "high_income", "married", "parentship")

all <- c(demographics, "high_temperature","high_usage","high_general_risk","high_weather_risk","high_accuracy","high_credibility")
ci_left <- data.table()
for(RHS in c("none", "demographics", "all")){
  for(LHS in c("a", "b")){
    ols <- lm(formula = reformulate(termlabels = get(RHS), 
                                    response = LHS),
              data = data,
              subset = (surprise == FALSE))
    for(communication in c("interval", "both")){
      tmp <- coefci(x = ols,
                    parm = paste0("communication", communication, ":treatedTRUE"),
                    vcov = vcovCL(x = ols, 
                                  cluster = data[surprise == FALSE,
                                                 participant.label], 
                                  type = "HC1"),
                    level = 0.95) %>% 
        data.table(model = RHS, outcome = paste0(LHS, " (", communication, ")"))
      tmp[, center := (`2.5 %` + `97.5 %`)/2]
      
      ci_left <- rbind(ci_left, tmp)
    }
  }
}
ci_left %>% head(n = 7) %>% kable()
2.5 % 97.5 % model outcome center
-0.1231424 0.0699195 none a (interval) -0.0266115
-0.1603397 0.0317751 none a (both) -0.0642823
-0.0085202 0.0705807 none b (interval) 0.0310303
-0.0069006 0.0744238 none b (both) 0.0337616
-0.1032781 0.0924975 demographics a (interval) -0.0053903
-0.1519774 0.0393496 demographics a (both) -0.0563139
-0.0106052 0.0750051 demographics b (interval) 0.0321999
left <- ggplot(data = ci_left,
       mapping = aes(y = outcome)) +
  geom_pointrange(mapping = aes(x = center,
                                y = outcome,
                                xmin = `2.5 %`,
                                xmax = `97.5 %`,
                                shape = factor(model)),
                  data = ci_left,
                  position = position_dodge(width = 0.4),
                  fatten=5,
                  alpha=.8) +
  geom_vline(xintercept = 0, 
             color = "red", 
             alpha = 0.66, 
             show.legend = FALSE) +
  theme_bw() +
  labs(#title = "(b) Effect of information treatments (both, interval) relative to best guess, confirmation treatments",
       y = "Ambiguity Index", 
       x = "Estimate",
       shape="Control variables")

3.5.3 Right Panel

ci_right <- data.table()
for(RHS in c("none", "demographics", "all")){
  for(LHS in c("a", "b")){
    ols <- lm(formula = reformulate(termlabels = get(RHS), 
                                    response = LHS),
              data = data,
              subset = (surprise == TRUE))
    for(communication in c("interval", "both")){
      tmp <- coefci(x = ols,
                    parm = paste0("communication", communication, ":treatedTRUE"),
                    vcov = vcovCL(x = ols, 
                                  cluster = data[surprise == TRUE,
                                                 participant.label], 
                                  type = "HC1"),
                    level = 0.95) %>% 
        data.table(model = RHS, outcome = paste0(LHS, " (", communication, ")"))
      tmp[, center := (`2.5 %` + `97.5 %`)/2]
      
      ci_right <- rbind(ci_right, tmp)
    }
  }
}
ci_right %>% head(n = 7) %>% kable()
2.5 % 97.5 % model outcome center
-0.0664967 0.1147544 none a (interval) 0.0241288
-0.0495534 0.1325661 none a (both) 0.0415063
-0.0339037 0.0457592 none b (interval) 0.0059278
-0.0619626 0.0165198 none b (both) -0.0227214
-0.0798350 0.1115715 demographics a (interval) 0.0158682
-0.0675663 0.1267932 demographics a (both) 0.0296134
-0.0490060 0.0352795 demographics b (interval) -0.0068632
right <- ggplot(data = ci_right,
       mapping = aes(y = outcome)) +
  geom_pointrange(mapping = aes(x = center,
                                y = outcome,
                                xmin = `2.5 %`,
                                xmax = `97.5 %`,
                                shape = factor(model)),
                  data = ci_right,
                  position = position_dodge(width = 0.4),
                  fatten=5,
                  alpha=.8) +
  geom_vline(xintercept = 0, 
             color = "red", 
             alpha = 0.66, 
             show.legend = FALSE) +
  theme_bw() +
  labs(#title = "(c) Effect of information treatments (both, interval) relative to best guess, contradiction treatments",
       y = "Ambiguity Index", 
       x = "Estimate",
       shape="Control variables") +
  scale_y_discrete(position = "right")

3.5.4 Assemble Top, Left and Right Panels

As before, we use the {{patchwork}} package to assmble the plot objects.

(top / (left | right) & theme(legend.position = "bottom")) + plot_layout(guides = "collect")

Figure 3.3: Treatment effects of regression equation (1) with dependent variables b and a. Estimators with 95% confidence intervals. The underlying standard errors (“HC1”) are clustered at the individual level and estimated with the R package sandwich (Zeileis, 2004; Zeileis et al., 2020).

3.6 Figure 4

The process of producing Figure 3.4 is very similar to the process of Figure 3.3. The only difference is that we do not loop through the outcomes a and b but the events’ matching probabilities (E1, E2, ..., E23).

To loop through these variables, we use a regex (E\\d) to create a vector called events.

events <- names(data) %>% str_subset(pattern = "E\\d")

Because the process is, from now on, so similar to the process of Figure 3.3, we do not comment it any further here.

3.6.1 Top Panel

none <- c("surprise", "treated", "surprise*treated")

demographics <- c(none, "age_35_52","age_53_plus","female", "high_education", "high_income", "married", "parentship")

all <- c(demographics, "high_temperature","high_usage","high_general_risk","high_weather_risk","high_accuracy","high_credibility")
ci_top <- data.table()
for(RHS in c("none", "demographics", "all")){
  for(LHS in events){
    ols <- lm(formula = reformulate(termlabels = get(RHS), 
                                    response = LHS),
              data = data)
    
    tmp <- coefci(x = ols,
                  parm = "surpriseTRUE:treatedTRUE",
                  vcov = vcovCL(x = ols, 
                                cluster = data$participant.label, 
                                type = "HC1"),
                  level = 0.95) %>% data.table(model = RHS, outcome = LHS)
    tmp[, center := (`2.5 %` + `97.5 %`)/2]
    
    ci_top <- rbind(ci_top, tmp)
    
    
    ci_top[, outcome := as.factor(outcome)]
    ci_top[, outcome := factor(outcome, 
                               levels = rev(c("E1", "E2", "E3", "E12", "E13", "E23")))]
  }
}
ci_top %>% head(n = 7) %>% kable()
2.5 % 97.5 % model outcome center
4.983983 10.142448 none E1 7.563215
1.600247 6.299091 none E12 3.949669
1.327104 6.115620 none E13 3.721362
-7.842483 -3.429947 none E2 -5.636215
-12.800137 -7.599836 none E23 -10.199987
-6.107878 -1.334793 none E3 -3.721335
4.421240 9.844463 demographics E1 7.132852
top <- ggplot(data = ci_top,
       mapping = aes(y = outcome)) +
  geom_pointrange(mapping = aes(x = center,
                                y = outcome,
                                xmin = `2.5 %`,
                                xmax = `97.5 %`,
                                shape = factor(model)),
                  data = ci_top,
                  position = position_dodge(width = 0.4),
                  fatten=5,
                  alpha=.8) +
  geom_vline(xintercept = 0, 
             color = "red", 
             alpha = 0.66, 
             show.legend = FALSE) +
  theme_bw() +
  labs(y = "Ambiguity Index", 
       x = "Estimate",
       shape="Control variables")

3.6.2 Left Panel

none <- c("communication", "treated", "communication*treated")

demographics <- c(none, "age_35_52","age_53_plus","female", "high_education", "high_income", "married", "parentship")

all <- c(demographics, "high_temperature","high_usage","high_general_risk","high_weather_risk","high_accuracy","high_credibility")
ci_left <- data.table()
for(RHS in c("none", "demographics", "all")){
  for(LHS in events){
    ols <- lm(formula = reformulate(termlabels = get(RHS), 
                                    response = LHS),
              data = data,
              subset = (surprise == FALSE))
    for(communication in c("interval", "both")){
      tmp <- coefci(x = ols,
                    parm = paste0("communication", communication, ":treatedTRUE"),
                    vcov = vcovCL(x = ols, 
                                  cluster = data[surprise == FALSE,
                                                 participant.label], 
                                  type = "HC1"),
                    level = 0.95) %>% 
        data.table(model = RHS, outcome = paste0(LHS, " (", communication, ")"))
      tmp[, center := (`2.5 %` + `97.5 %`)/2]
      
      ci_left <- rbind(ci_left, tmp)
    }
  }
}
ci_left %>% head(n = 7) %>% kable()
2.5 % 97.5 % model outcome center
-6.746653 1.5693594 none E1 (interval) -2.5886468
-7.472010 0.5541086 none E1 (both) -3.4589505
-7.466795 0.4839339 none E12 (interval) -3.4914306
-4.297893 3.4859217 none E12 (both) -0.4059856
-2.758720 5.5611888 none E13 (interval) 1.4012346
-6.134510 2.0764806 none E13 (both) -2.0290148
-4.899051 2.4400333 none E2 (interval) -1.2295086
left <- ggplot(data = ci_left,
       mapping = aes(y = outcome)) +
  geom_pointrange(mapping = aes(x = center,
                                y = outcome,
                                xmin = `2.5 %`,
                                xmax = `97.5 %`,
                                shape = factor(model)),
                  data = ci_left,
                  position = position_dodge(width = 0.4),
                  fatten=5,
                  alpha=.8) +
  geom_vline(xintercept = 0, 
             color = "red", 
             alpha = 0.66, 
             show.legend = FALSE) +
  theme_bw() +
  labs(y = "Ambiguity Index", 
       x = "Estimate",
       shape="Control variables")

3.6.3 Right Panel

ci_right <- data.table()
for(RHS in c("none", "demographics", "all")){
  for(LHS in events){
    ols <- lm(formula = reformulate(termlabels = get(RHS), 
                                    response = LHS),
              data = data,
              subset = (surprise == TRUE))
    for(communication in c("interval", "both")){
      tmp <- coefci(x = ols,
                    parm = paste0("communication", communication, ":treatedTRUE"),
                    vcov = vcovCL(x = ols, 
                                  cluster = data[surprise == TRUE,
                                                 participant.label], 
                                  type = "HC1"),
                    level = 0.95) %>% 
        data.table(model = RHS, outcome = paste0(LHS, " (", communication, ")"))
      tmp[, center := (`2.5 %` + `97.5 %`)/2]
      
      ci_right <- rbind(ci_right, tmp)
    }
  }
}
ci_right %>% head(n = 7) %>% kable()
2.5 % 97.5 % model outcome center
-9.6135941 -0.0515961 none E1 (interval) -4.8325951
-2.8115517 7.0630438 none E1 (both) 2.1257460
-4.0358433 4.3361386 none E12 (interval) 0.1501477
-0.4741903 8.1055554 none E12 (both) 3.8156825
-8.3619000 -0.3107652 none E13 (interval) -4.3363326
-6.0444150 2.5918118 none E13 (both) -1.7263016
-1.4998061 6.0782483 none E2 (interval) 2.2892211
right <- ggplot(data = ci_right,
       mapping = aes(y = outcome)) +
  geom_pointrange(mapping = aes(x = center,
                                y = outcome,
                                xmin = `2.5 %`,
                                xmax = `97.5 %`,
                                shape = factor(model)),
                  data = ci_right,
                  position = position_dodge(width = 0.4),
                  fatten=5,
                  alpha=.8) +
  geom_vline(xintercept = 0, 
             color = "red", 
             alpha = 0.66, 
             show.legend = FALSE) +
  theme_bw() +
  labs(y = "Ambiguity Index", 
       x = "Estimate",
       shape="Control variables") +
  scale_y_discrete(position = "right")

3.6.4 Assemble Top, Left and Right Panels

(top / (left | right) & theme(legend.position = "bottom")) + plot_layout(guides = "collect")

Figure 3.4: Treatment effects of regression equation (1) with matching probabilities for all events as depen- dent variables. Estimators with 95% confidence intervals. The underlying standard errors (“HC1”) are clustered at the individual level and estimated with the R package sandwich (Zeileis, 2004; Zeileis et al., 2020).

Session Info

sessionInfo()
R version 4.4.1 (2024-06-14)
Platform: x86_64-apple-darwin20
Running under: macOS Sonoma 14.4.1

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRblas.0.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: Europe/Zurich
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] patchwork_1.2.0   rstatix_0.7.2     ggpubr_0.6.0      ggplot2_3.5.1    
 [5] lmtest_0.9-40     zoo_1.8-12        sandwich_3.1-0    glue_1.7.0       
 [9] knitr_1.48        lubridate_1.9.3   stringr_1.5.1     data.table_1.15.4
[13] magrittr_2.0.3   

loaded via a namespace (and not attached):
 [1] utf8_1.2.4        generics_0.1.3    tidyr_1.3.1       stringi_1.8.4    
 [5] lattice_0.22-6    digest_0.6.36     evaluate_0.24.0   grid_4.4.1       
 [9] timechange_0.3.0  fastmap_1.2.0     jsonlite_1.8.8    backports_1.5.0  
[13] groundhog_3.2.0   purrr_1.0.2       fansi_1.0.6       scales_1.3.0     
[17] abind_1.4-5       cli_3.6.3         rlang_1.1.4       munsell_0.5.1    
[21] withr_3.0.1       yaml_2.3.10       tools_4.4.1       parallel_4.4.1   
[25] ggsignif_0.6.4    dplyr_1.1.4       colorspace_2.1-1  broom_1.0.6      
[29] vctrs_0.6.5       R6_2.5.1          lifecycle_1.0.4   car_3.1-2        
[33] htmlwidgets_1.6.4 pkgconfig_2.0.3   pillar_1.9.0      gtable_0.3.5     
[37] xfun_0.46         tibble_3.2.1      tidyselect_1.2.1  rstudioapi_0.16.0
[41] farver_2.1.2      htmltools_0.5.8.1 labeling_0.4.3    carData_3.0-5    
[45] rmarkdown_2.27    compiler_4.4.1